Create a contrast matrix
Now define contrasts to compare metastatic groups to the primary
group.
contrast.matrix <- makeContrasts(
skin_vs_primary = skin - primary,
lung_vs_primary = lung - primary,
liver_vs_primary = liver - primary,
lymph_node_vs_primary = lymph_node - primary,
pleural_effusion_vs_primary = pleural_effusion - primary,
central_nervous_system_vs_primary = central_nervous_system - primary,
ascites_vs_primary = ascites - primary,
fibroblast_vs_primary = fibroblast - primary,
uvea_vs_primary = uvea - primary,
sinonasal_vs_primary = sinonasal - primary,
levels = design)
Filter out low count genes because they add to the noise of the
data
log_tpm <- TPM_22Q2_mat
keep <- rowMeans(2^log_tpm - 1) > 1 # Back-transform and filter
log_tpm <- log_tpm[keep, ]
Fit Model
# Fit linear model directly to log2(TPM+1)
fit <- lmFit(log_tpm, design)
fit2 <- contrasts.fit(fit, contrast.matrix)
# Empirical Bayes moderation
fit2 <- eBayes(fit2)
# Extract top differentially expressed genes
skin_de <- topTable(fit2, coef = "skin_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
lung_de <- topTable(fit2, coef = "lung_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
liver_de <- topTable(fit2, coef = "liver_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
lymph_de <- topTable(fit2, coef = "lymph_node_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
pe_de <- topTable(fit2, coef = "pleural_effusion_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
cns_de <- topTable(fit2, coef = "central_nervous_system_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
ascite_de <- topTable(fit2, coef = "ascites_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
fibro_de <- topTable(fit2, coef = "fibroblast_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
uvea_de <- topTable(fit2, coef = "uvea_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
sinonasal_de <- topTable(fit2, coef = "sinonasal_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
Volcano Plots
A Volcano plot (log2 fold change vs -log10 p-value) helps identify
genes that display large magnitude changes and high significance.